# -*- coding:UTF-8 -*- 
"""
@Project:   DataCrawler
@FileName:  downloader.py 
@CreateDate:2023/4/22 23:27  
@Author:    Jia  
@Desc:      URL下载器
"""
import requests
import asyncio
import aiohttp
from Common import file_method


class HtmlDownloader:
    def __init__(self, logger):
        data = file_method.FileMethod().read_yaml('../Config/TianYanCha.yaml')
        self.head = data['head']
        self.logger = logger
        self.num = 1

    def download(self, url):
        """同步下载页面内容"""

        res = requests.get(url, headers=self.head)
        if res.status_code != 200:
            return None
        else:
            text = res.text
            return text

    async def async_download(self, url):
        """异步下载页面内容"""

        # 限制并发数量为5
        async with asyncio.Semaphore(5):
            async with aiohttp.ClientSession() as session:
                res = await session.get(url, headers=self.head)
                if res.status != 200:
                    self.logger.info(f'第{self.num}个爬取失败的URL是：{url}')
                    return None
                else:
                    res_text = await res.text(encoding='utf-8')
                    file_method.FileMethod().write_file('../Data/res_text2.html', res_text)

                    # self.logger.info(f'第{self.num}个爬取成功')
                    # self.num += 1
                    await asyncio.sleep(0.5)
                    return res_text


if __name__ == '__main__':
    from Config import log
    logger = log.Logs().debug_logger()
    h = HtmlDownloader(logger)
    url = 'https://www.tianyancha.com/company/5233340769'
    task1 = asyncio.ensure_future(h.async_download(url))
    loop = asyncio.get_event_loop()
    loop.run_until_complete(task1)
